************************************************************************************************************
* CPI data for services sector
* to be used in robustness checks: CPI services in place of PPI services
************************************************************************************************************

clear
set more off

*global rootfolder ""
global cpifolder "$rootfolder\data\4_CPI"
capture mkdir "$cpifolder\output"
cd "$cpifolder\output"

************************************************************************************************************
* Eurostat
************************************************************************************************************
import delimited "$cpifolder\prc_hicp_midx.csv",clear varnames(1)
split unitcoicopgeotime,p(",")
rename unitcoicopgeotime1 unit
rename unitcoicopgeotime2 coicop
rename unitcoicopgeotime3 geo

drop unitcoicopgeotime
order unit coicop geo
label var unit "unit" 
label var coicop "coicop"
label var geo "geo"

keep if unit =="I05"
keep if coicop =="SERV"
drop unit coicop

foreach v of varlist _all {
   local x : variable label `v'
   rename `v' Y`x'
}

rename Ygeo geo

destring _all, replace ignore("d" "r" "p" "s" "c" "ei" "e" ":" "cp" "ce")
replace geo ="GR" if geo=="EL"
replace geo ="GB" if geo=="UK"
drop if geo == "EA" | geo == "EA18" | geo == "EA19" | geo == "EEA" | geo == "EU" | geo == "EU28"
*sxpose, clear format(%9.0g) firstnames force
rename Y*M* Y**
reshape long Y,i(geo) j(date)
drop if Y == .
rename Y cpi_serv
gen date2 = string(date)
gen  year = substr(date2,1,4)
gen month = substr(date2,-2,.)
destring month year, replace
drop date*
gen date = ym(year,month)
format date %tm
drop month year
rename date month
rename geo country
order country month cpi_serv
save "CPIserv_Eurostat.dta", replace

************************************************************************************************************
* BLS - NSA 
************************************************************************************************************
import delimited "$cpifolder\cudata2Summaries.txt",clear
gen seriesid = trim(series_id)
keep if seriesid == "CUUR0000SAS"
*CUUR0000SAS,1982-84=100	Services in U.S. city average, all urban consumers, not seasonally adjusted
gen month = substr(period,-2,.)
destring month, replace
drop if period =="M13"
gen date = ym(year,month)
format date %tm
drop if year < 1980
drop seriesid series_id footnote_codes period month year
rename date month
rename value cpi_serv
gen country = "US"
order country month cpi_serv
drop if month==.
save "CPIserv_BLS.dta", replace

************************************************************************************************************
* STATCAN
************************************************************************************************************
import delimited "$cpifolder\03260020-eng.csv",clear
keep if geo == "Canada" & comm =="Services"
gen year = substr(ref_date,1,4)
gen month = substr(ref_date,-2,.)
destring month year, replace
gen date = ym(year,month)
format date %tm
drop geographicalclassification comm vector coordinate ref_date geo year month
rename date month
rename value cpi_serv
gen country = "CA"
order country month cpi_serv
save "CPIserv_StatCan.dta", replace

************************************************************************************************************
* Datastream
************************************************************************************************************
* Australia interpolated; China and Russia joined for longest possible in Matlab
foreach cc in AU CN RU JP KR MX TW  {
			 * BR ID IN
	insheet using "$cpifolder\DS_CPI_`cc'_m.csv", clear delim(",") non

	foreach dd of varlist _all {
		local newname = `dd'[1]
		rename `dd' v`newname'
	}

	rename vDATE date

	drop in 1
	drop if mi(date)
	reshape long v, i(date) j(ds_code) string
	rename v cpi_serv

	replace ds_code = strupper(ds_code)
	destring cpi_serv, replace ignore("NA")

	*generate the month variable
	*tostring date, replace
	gen stub3 = substr(date,1,4)
	gen stub2 = substr(date,5,2)
	destring stub*, replace
	drop date
	gen date = ym(stub3, stub2)
	format %tm date
	rename stub3 year
	rename stub2 month
	capture drop stub*
	sort date
	rename ds_code country

	compress
	save CPI_Datastream_`cc'.dta, replace
}

* Merge all countries together
use "CPI_Datastream_AU.dta", clear
foreach i in CN JP KR MX RU TW {
		* BR ID IN 
	append using "CPI_Datastream_`i'.dta"
	}
drop if mi(cpi_serv)
drop year month
rename date month
order country month cpi_serv
compress
save "CPIserv_Datastream.dta", replace

************************************************************************************************************
* EUROSTAT + Datastream (joined in Matlab)
************************************************************************************************************
foreach cc in AT BE BG CZ DK ES FR GB HU IE IT NL PT RO SI SE TR {

	insheet using "$cpifolder\DSestatJoined_CPI_`cc'_m.csv", clear delim(",") non

	foreach dd of varlist _all {
		local newname = `dd'[1]
		rename `dd' v`newname'
	}

	rename vDATE date

	drop in 1
	drop if mi(date)
	reshape long v, i(date) j(ds_code) string
	rename v cpi_serv

	replace ds_code = strupper(ds_code)
	destring cpi_serv, replace ignore("NA")

	*generate the month variable
	*tostring date, replace
	gen stub3 = substr(date,1,4)
	gen stub2 = substr(date,5,2)
	destring stub*, replace
	drop date
	gen date = ym(stub3, stub2)
	format %tm date
	rename stub3 year
	rename stub2 month
	capture drop stub*
	sort date
	rename ds_code country

	compress
	save CPI_DsEstatJoined_`cc'.dta, replace
}
* Merge all countries together
use "CPI_DsEstatJoined_AT.dta", clear
foreach i in BE BG CZ DK ES FR GB HU IE IT NL PT RO SI SE TR {
	append using "CPI_DsEstatJoined_`i'.dta"
	}
drop if mi(cpi_serv)
drop year month
rename date month
order country month cpi_serv
compress
save "CPIserv_DsEstatJoined.dta", replace


************************************************************************************************************
* Combine all
************************************************************************************************************
use "CPIserv_Eurostat.dta", clear
append using "CPIserv_BLS.dta"
append using "CPIserv_StatCan.dta"
append using "CPIserv_Datastream.dta"

* Before appending "DS + Estat series", drop short ones to prevent double values
foreach cc in AT BE BG CZ DK ES FR GB HU IE IT NL PT RO SI SE TR{
drop if country == "`cc'"
}

append using "CPIserv_DsEstatJoined.dta"
tab country
rename month date
save "CPIserv.dta", replace

/*
encode country,gen(cx)
xtset cx date
xtline cpi_serv
*/


* Next
cd "$rootfolder\data"
